Clear out your workspace
rm(list=ls(all=FALSE))
ls()
## character(0)
Load packages and function
library(car)
library(dplyr)
library(ggplot2)
library(openxlsx)
library(tidyverse)
library(data.table)
is.nan.data.frame <- function(x)
do.call(cbind, lapply(x, is.nan))
Data are stored in the tab “HA - per well” in the excel file “Data_paper” in Sandra’s OneDrive: https://panthers-my.sharepoint.com/personal/mclellan_uwm_edu/Documents/Normalization paper Sept 2020/.
data<-read.table("~/OneDrive - UWM/Normalization paper Sept 2020/Data/Data_paper_HA.txt", sep = "\t", h=T)
head(data)
## CV Daily.Avg.Flow..MGD. TSS..mg.L. pH Total.P..mg.L. BOD..mg.L. Temp...C.
## 1 322 9.73 144 7.7 2.90 134 20.6
## 2 322 9.73 144 7.7 2.90 134 20.6
## 3 322 9.73 144 7.7 2.90 134 20.6
## 4 323 9.11 184 7.7 3.71 157 21.7
## 5 323 9.11 184 7.7 3.71 157 21.7
## 6 350 7.82 154 7.7 4.18 174 9.0
## Conductivity..uS.cm. City Sample_ID
## 1 NA Brookfield BRK 9/14-1 HA frozen filter
## 2 NA Brookfield BRK 9/14-1 HA frozen filter
## 3 NA Brookfield BRK 9/14-1 HA frozen filter
## 4 NA Brookfield BRK 9/15-1 HA frozen filter
## 5 NA Brookfield BRK 9/15-1 HA frozen filter
## 6 NA Brookfield BRK 9/21-1 HA frozen filter
## Run_N1N2 Well Collection_Date_Start N1_avg N1_LOD N1_LOQ
## 1 9-20 N1N2 BCOV (HEPG) E05 09/13/2020 1315.20 2121.027 7166.087
## 2 9-20 N1N2 BCOV (HEPG) E06 09/13/2020 648.96 2121.027 7166.087
## 3 9-22 BCOV F08 09/13/2020 NA NA NA
## 4 9-20 N1N2 BCOV (HEPG) G05 09/14/2020 3206.40 2121.027 7166.087
## 5 9-20 N1N2 BCOV (HEPG) G06 09/14/2020 4166.40 2121.027 7166.087
## 6 9-24 N1N2 mad stnd C05 09/20/2020 10051.20 2121.027 7166.087
## N2_avg N2_LOD N2_LOQ BCoV PMMoV HF183 N1_LevelDetection
## 1 1315.20 3569.441 6736.762 NA NA NA BLD
## 2 648.96 3569.441 6736.762 NA NA NA BLD
## 3 NA NA NA 1.588519 NA NA <NA>
## 4 1920.00 3569.441 6736.762 NA NA NA BLQ
## 5 1190.40 3569.441 6736.762 NA NA NA BLQ
## 6 4694.40 3569.441 6736.762 NA NA NA Quantifiable
## N2_LevelDetection N1N2_avg N1_positive N2_positive ddPCR_run_info
## 1 BLD 1315.20 2 2 before_11-20-2020
## 2 BLD 648.96 1 1 before_11-20-2020
## 3 <NA> NA 0 0 before_11-20-2020
## 4 BLD 2563.20 5 3 before_11-20-2020
## 5 BLD 2678.40 7 2 before_11-20-2020
## 6 BLQ 7372.80 15 7 before_11-20-2020
### count number of N1/N2 assay replicate per filter
N1N2.assay.count<-aggregate(N1_avg ~ Sample_ID, subset(data, N1_avg!="NA"), FUN = length)
names(N1N2.assay.count)[2]<-"N1N2_n_assay_per_filter"
data<-as.data.frame(setDT(N1N2.assay.count)[data, on="Sample_ID"])
### count number of filter replicate per sample
filter.count<-aggregate(N1_avg ~ CV + Sample_ID, subset(data, N1_avg!="NA"), FUN = length)
filter.count<-aggregate(N1_avg ~ CV, filter.count, FUN = length)
names(filter.count)[2]<-"n_filter"
data<-as.data.frame(setDT(filter.count)[data, on="CV"])
N1 vs N2
data.quantifiable<-subset(data, N1_LevelDetection=="Quantifiable" & N2_LevelDetection=="Quantifiable")
# All data (N2-single quenched probe and N2-double quenched probe)
nrow(data.quantifiable)
## [1] 408
cor.test(log10(data.quantifiable$N1_avg), log10(data.quantifiable$N2_avg), method = c("spearman"))
## Warning in cor.test.default(log10(data.quantifiable$N1_avg),
## log10(data.quantifiable$N2_avg), : Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: log10(data.quantifiable$N1_avg) and log10(data.quantifiable$N2_avg)
## S = 1575173, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.8608441
mean(log10(data.quantifiable$N1_avg)/log10(data.quantifiable$N2_avg))
## [1] 1.055579
sd(log10(data.quantifiable$N1_avg)/log10(data.quantifiable$N2_avg))
## [1] 0.03750799
# N1 + N2-single quenched probe
data.N2singlequenched<-subset(data.quantifiable, ddPCR_run_info == "before_11-20-2020")
nrow(data.N2singlequenched)
## [1] 321
mean(log10(data.N2singlequenched$N1_avg)/log10(data.N2singlequenched$N2_avg))
## [1] 1.054598
sd(log10(data.N2singlequenched$N1_avg)/log10(data.N2singlequenched$N2_avg))
## [1] 0.03593595
# N1 + N2-double quenched probe
data.N2doublequenched<-subset(data.quantifiable, ddPCR_run_info == "after_11-20-2020")
nrow(data.N2doublequenched)
## [1] 87
mean(log10(data.N2doublequenched$N1_avg)/log10(data.N2doublequenched$N2_avg))
## [1] 1.059196
sd(log10(data.N2doublequenched$N1_avg)/log10(data.N2doublequenched$N2_avg))
## [1] 0.04284842
How many time…
#### ALL DATES ####
#... N1 > LOQ & N2 < LOQ
comparison.1<-subset(data, N1_LevelDetection=="Quantifiable" & N2_LevelDetection!="Quantifiable")
nrow(comparison.1)
## [1] 136
#... N1 < LOQ & N2 > LOQ
comparison.2<-subset(data, N1_LevelDetection!="Quantifiable" & N2_LevelDetection=="Quantifiable")
nrow(comparison.2)
## [1] 19
#All N1N2 data
comparison.3<-subset(data, N1_LevelDetection!="NA" & N2_LevelDetection!="NA")
nrow(comparison.3)
## [1] 817
#### BEFORE 11-20-2020 ####
#... N1 > LOQ & N2 < LOQ
comparison.1.before<-subset(comparison.1, ddPCR_run_info=="before_11-20-2020")
nrow(comparison.1.before)
## [1] 112
#... N1 < LOQ & N2 > LOQ
comparison.2.before<-subset(comparison.2, ddPCR_run_info=="before_11-20-2020")
nrow(comparison.2.before)
## [1] 19
#All N1N2 data
comparison.3.before<-subset(comparison.3, ddPCR_run_info=="before_11-20-2020")
nrow(comparison.3.before)
## [1] 689
#### AFTER 11-20-2020 ####
#... N1 > LOQ & N2 < LOQ
comparison.1.after<-subset(comparison.1, ddPCR_run_info=="after_11-20-2020")
nrow(comparison.1.after)
## [1] 24
#... N1 < LOQ & N2 > LOQ
comparison.2.after<-subset(comparison.2, ddPCR_run_info=="after_11-20-2020")
nrow(comparison.2.after)
## [1] 0
#All N1N2 data
comparison.3.after<-subset(comparison.3, ddPCR_run_info=="after_11-20-2020")
nrow(comparison.3.after)
## [1] 128
Plot N2 vs N1
ggplot(data.quantifiable, aes(x=log10(N2_avg), y=log10(N1_avg))) +
geom_point(aes(color=City),size=0.5) +
theme_classic() +
scale_x_continuous(limits = c(2.5, 5.5)) +
scale_y_continuous(limits = c(2.5, 5.5)) +
labs(y = "log10 - N1 cp/L",
x = "log10 - N2 cp/L",
title = "1 point = one assay when quantifiable") +
geom_abline(intercept = 0, slope = 1) +
stat_smooth(method = "lm", col = "red",size=0.5)
## `geom_smooth()` using formula 'y ~ x'
data.per.filter<-data %>%
group_by(Sample_ID, City, CV, Collection_Date_Start) %>%
summarize(N1.avg = mean(N1_avg, na.rm=TRUE),
N1.rsd = sd(N1_avg, na.rm=TRUE)/mean(N1_avg, na.rm=TRUE),
N1.LevelDetection = sum(ifelse(N1_LevelDetection=="Quantifiable", 100, ifelse(N1_LevelDetection=="BLQ", 1,0)), na.rm=TRUE),
N1.count = sum(!is.na(N1_LevelDetection)),
BCoV.avg_recovery_rate = mean(BCoV, na.rm=TRUE),
PMMoV.avg = mean(PMMoV, na.rm=TRUE),
HF183.avg = mean(HF183, na.rm=TRUE),
TSS.avg = mean(as.numeric(as.character(TSS..mg.L.)),na.rm = TRUE),
Flow.avg = mean(as.numeric(as.character(Daily.Avg.Flow..MGD.)),na.rm = TRUE),
.groups = 'drop')
data.per.filter<-as.data.frame(data.per.filter)
data.per.filter.NaN<-is.nan.data.frame(data.per.filter)
data.per.filter[data.per.filter.NaN]<-NA
names(data.per.filter)<-sub("[.]", "_", names(data.per.filter)); names(data.per.filter)<-sub("[.]", "_", names(data.per.filter)) # replace "." by "_"in header
# Recompute the level of detection
data.per.filter$N1_LowestLevelDetection<-ifelse(data.per.filter$N1_LevelDetection/data.per.filter$N1_count==100, "Quantifiable", ifelse(data.per.filter$N1_LevelDetection/data.per.filter$N1_count>=1, "BLQ", "BLD"))
data.per.filter$N1_LevelDetection<-ifelse(data.per.filter$N1_LevelDetection>=100, "Quantifiable", ifelse(data.per.filter$N1_LevelDetection>0, "BLQ", "BLD"))
data.per.day<-data.per.filter %>%
group_by(CV, City,Collection_Date_Start) %>%
summarize(N1.avg.day = mean(N1_avg, na.rm=TRUE),
N1.LevelDetection.day = sum(ifelse(N1_LevelDetection=="Quantifiable", 100, ifelse(N1_LevelDetection=="BLQ", 1,0)), na.rm=TRUE),
N1.LowestLevelDetection.day = sum(ifelse(N1_LowestLevelDetection=="Quantifiable", 100, ifelse(N1_LowestLevelDetection=="BLQ", 1,0)), na.rm=TRUE),
N1.count.day = sum(!is.na(N1_LevelDetection)),
N1.rsd.day = sd(N1_avg, na.rm=TRUE)/mean(N1_avg, na.rm=TRUE),
BCoV.avg.day = mean(BCoV_avg_recovery_rate, na.rm=TRUE),
BCoV.rsd.day = sd(BCoV_avg_recovery_rate, na.rm=TRUE)/mean(BCoV_avg_recovery_rate, na.rm=TRUE),
PMMoV.avg.day = mean(PMMoV_avg, na.rm=TRUE),
PMMoV.rsd.day = sd(PMMoV_avg, na.rm=TRUE)/mean(PMMoV_avg, na.rm=TRUE),
HF183.avg.day = mean(HF183_avg, na.rm=TRUE),
HF183.rsd.day = sd(HF183_avg, na.rm=TRUE)/mean(HF183_avg, na.rm=TRUE),
TSS.avg.day = mean(as.numeric(as.character(TSS_avg)),na.rm = TRUE),
Flow.avg.day = mean(as.numeric(as.character(Flow_avg)),na.rm = TRUE),
.groups = 'drop')
data.per.day<-as.data.frame(data.per.day)
data.per.day.NaN<-is.nan.data.frame(data.per.day)
data.per.day[data.per.day.NaN]<-NA
names(data.per.day)<-sub("[.]", "_", names(data.per.day)); names(data.per.day)<-sub("[.]", "_", names(data.per.day))
data.per.day$N1_LowestLevelDetection_day<-ifelse(data.per.day$N1_LowestLevelDetection_day/data.per.day$N1_count_day==100, "Quantifiable", ifelse(data.per.day$N1_LowestLevelDetection_day/data.per.day$N1_count_day>=1, "BLQ", "BLD"))
data.per.day$N1_LevelDetection_day<-ifelse(data.per.day$N1_LevelDetection_day>=100, "Quantifiable", ifelse(data.per.day$N1_LevelDetection_day>0, "BLQ", "BLD"))
# All data
mean(data.per.day$BCoV_avg_day, na.rm = TRUE)
## [1] 4.937945
sd(data.per.day$BCoV_avg_day, na.rm = TRUE)
## [1] 4.232944
# ANOVA BCoV recovery vs WWTPs
## ANOVA
mod1 <- aov(log10(BCoV_avg_day) ~ City, data = data.per.day)
summary(mod1)
## Df Sum Sq Mean Sq F value Pr(>F)
## City 11 26.52 2.4107 26.91 <2e-16 ***
## Residuals 405 36.28 0.0896
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 1 observation deleted due to missingness
plot(mod1)
## Test post-hoc
post.hoc1<-TukeyHSD(mod1, ordered = TRUE, conf.level = 0.95)
label1<-multcompView::multcompLetters(post.hoc1$City[,"p adj"])
## Plot
label1.df<-as.data.frame(label1$Letters)
names(label1.df)<-"Letter"
label1.df$City<-row.names(label1.df)
data.per.day.bcov.plot<-setDT(label1.df)[data.per.day, on="City"]
ggplot(data.per.day.bcov.plot, aes(x=City, y=BCoV_avg_day)) +
geom_jitter(width=0.2) +
geom_boxplot(outlier.colour=NA, fill=NA, colour="grey20") +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
labs(y = "BCoV recovery (%)",
x = "",
title = "") +
geom_text(aes(x = City, y = 40, label = Letter))
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing missing values (geom_point).
## More specific info
BCOV.Waukesha<-subset(data.per.day.bcov.plot, City == "Waukesha")
min(BCOV.Waukesha$BCoV_avg_day)
## [1] 0.2123006
max(BCOV.Waukesha$BCoV_avg_day)
## [1] 2.984008
nrow(BCOV.Waukesha)
## [1] 41
BCOV.Milwaukee_JI<-subset(data.per.day.bcov.plot, City == "Milwaukee_JI")
min(BCOV.Milwaukee_JI$BCoV_avg_day)
## [1] 0.892347
max(BCOV.Milwaukee_JI$BCoV_avg_day)
## [1] 27.95129
nrow(BCOV.Milwaukee_JI)
## [1] 51
cor.data0<-data.per.day[,c("City", "TSS_avg_day", "BCoV_avg_day")]
cor.data0[,2]<-as.numeric(as.character(cor.data0[,2]))
cor.data0[,3]<-as.numeric(as.character(cor.data0[,3]))
cor.data0<-subset(cor.data0, cor.data0[,1]!="NA" & cor.data0[,2]!="NA")
nrow(cor.data0)
## [1] 381
cor.test(cor.data0[,2], cor.data0[,3], method = "spearman")
## Warning in cor.test.default(cor.data0[, 2], cor.data0[, 3], method =
## "spearman"): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: cor.data0[, 2] and cor.data0[, 3]
## S = 11538573, p-value = 6.39e-07
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.2517899
ggplot(cor.data0, aes(x=TSS_avg_day, y=BCoV_avg_day, colour=cor.data0[,1])) +
geom_point()
mean(data.per.day$PMMoV_avg_day, na.rm = TRUE)
## [1] 23673005
sd(data.per.day$PMMoV_avg_day, na.rm = TRUE)
## [1] 43777087
sd(data.per.day$PMMoV_avg_day, na.rm=TRUE)/mean(data.per.day$PMMoV_avg_day, na.rm=TRUE)
## [1] 1.849241
# ANOVA PMMoV recovery vs WWTPs
mod2 <- aov(log10(PMMoV_avg_day) ~ City, data = data.per.day)
summary(mod2)
## Df Sum Sq Mean Sq F value Pr(>F)
## City 11 9.213 0.8376 11.66 <2e-16 ***
## Residuals 354 25.425 0.0718
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 52 observations deleted due to missingness
plot(mod2)
# Extract data
cor.pmmov<-data.per.day[,c("City", "PMMoV_avg_day", "Flow_avg_day")]
cor.pmmov[,2]<-log10(as.numeric(as.character(cor.pmmov[,2])))
cor.pmmov[,3]<-log10(as.numeric(as.character(cor.pmmov[,3])))
# Compute correlation per plants
cor.PMMoV.results <- cor.pmmov %>%
group_by(City) %>%
summarize(rho=cor(PMMoV_avg_day, Flow_avg_day, use="complete.obs", method = "spearman"),
n = sum(!is.na(PMMoV_avg_day) & !is.na(Flow_avg_day)),
.groups = 'drop')
cor.PMMoV.results$rho.abs<-abs(cor.PMMoV.results$rho)
min(cor.PMMoV.results$rho.abs); max(cor.PMMoV.results$rho.abs)
## [1] 0.1074237
## [1] 0.688731
mean(cor.PMMoV.results$rho.abs); median(cor.PMMoV.results$rho.abs)
## [1] 0.3085357
## [1] 0.2879652
# Prepare data for plot
cor.pmmov.cor<-as.data.frame(setDT(cor.PMMoV.results)[cor.pmmov, on="City"])
cor.pmmov.cor$WWTPs<-paste0(cor.pmmov.cor$City, " (", round(cor.pmmov.cor$rho, digits = 2), ")")
# Plot
ggplot(cor.pmmov.cor, aes(x=Flow_avg_day, y=PMMoV_avg_day, colour=WWTPs)) +
geom_point() +
labs(x = "Average daily flow - log10(MDG)",
y = "PMMoV - log10(cp/L)") +
theme_bw()
## Warning: Removed 55 rows containing missing values (geom_point).
# Select data
cor.pmmov.rsd<-data.per.day[,c("City", "PMMoV_avg_day", "Flow_avg_day")]
cor.pmmov.rsd<-subset(cor.pmmov.rsd, PMMoV_avg_day!="NA" & Flow_avg_day!="NA")
cor.pmmov.rsd[,2]<-log10(as.numeric(as.character(cor.pmmov.rsd[,2])))
cor.pmmov.rsd[,3]<-as.numeric(as.character(cor.pmmov.rsd[,3]))
#Compute means per plant
cor.pmmov.rsd.results<-cor.pmmov.rsd %>%
group_by(City) %>%
summarize(PMMoV.rsd.mean = (sd(PMMoV_avg_day, na.rm=TRUE)/mean(PMMoV_avg_day, na.rm=TRUE))*100,
Flow.mean = mean(Flow_avg_day, na.rm=TRUE),
n = sum(!is.na(City)),
.groups = 'drop')
#plot(PMMoV_rsd_day~Flow_avg_day, data=subse
# Correlation
cor.test(cor.pmmov.rsd.results$PMMoV.rsd.mean, cor.pmmov.rsd.results$Flow.mean, method = "spearman", na.action=na.omit)
##
## Spearman's rank correlation rho
##
## data: cor.pmmov.rsd.results$PMMoV.rsd.mean and cor.pmmov.rsd.results$Flow.mean
## S = 354, p-value = 0.4571
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.2377622
# Prepare data for plot
cor.pmmov.rsd.results$WWTPs<-paste0(cor.pmmov.rsd.results$City, " (", round(cor.pmmov.rsd.results$n, digits = 2), ")")
# Plot
ggplot(cor.pmmov.rsd.results, aes(x=Flow.mean, y=PMMoV.rsd.mean, colour=WWTPs)) +
geom_point() +
labs(x = "Averaged daily flow per WWTP - (MDG)",
y = "Averaged PMMoV RSD per WWTP (%)") +
theme_bw()
#ggplot(cor.pmmov.rsd.results, aes(x=log10(Flow.mean), y=log10(PMMoV.rsd.mean*100), colour=City)) +
# geom_point() +
# labs(x = "Averaged daily flow per WWTP - log10(MDG)",
# y = "PMMoV RSD log10(%)") +
# theme_bw()
mean(data.per.day$HF183_avg_day, na.rm = TRUE)
## [1] 700853478
sd(data.per.day$HF183_avg_day, na.rm = TRUE)
## [1] 374103910
sd(data.per.day$HF183_avg_day, na.rm=TRUE)/mean(data.per.day$HF183_avg_day, na.rm=TRUE)
## [1] 0.5337833
# ANOVA PMMoV recovery vs WWTPs
mod3 <- aov(log10(HF183_avg_day) ~ City, data = data.per.day)
summary(mod3)
## Df Sum Sq Mean Sq F value Pr(>F)
## City 11 6.548 0.5952 17.07 <2e-16 ***
## Residuals 287 10.006 0.0349
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 119 observations deleted due to missingness
plot(mod3)
# Extract data
cor.hf183<-data.per.day[,c("City", "HF183_avg_day", "Flow_avg_day")]
cor.hf183[,2]<-log10(as.numeric(as.character(cor.hf183[,2])))
cor.hf183[,3]<-log10(as.numeric(as.character(cor.hf183[,3])))
# Compute correlation per plants
cor.HF183.results <- cor.hf183 %>%
group_by(City) %>%
summarize(rho=cor(HF183_avg_day, Flow_avg_day, use="complete.obs", method = "spearman"),
n = sum(!is.na(HF183_avg_day) & !is.na(Flow_avg_day)),
.groups = 'drop')
cor.HF183.results$rho.abs<-abs(cor.HF183.results$rho)
min(cor.HF183.results$rho.abs); max(cor.HF183.results$rho.abs)
## [1] 0.06593407
## [1] 0.6560381
mean(cor.HF183.results$rho.abs); median(cor.HF183.results$rho.abs)
## [1] 0.3476914
## [1] 0.288188
# Prepare data for plot
cor.hf183.cor<-as.data.frame(setDT(cor.HF183.results)[cor.hf183, on="City"])
cor.hf183.cor$WWTPs<-paste0(cor.hf183.cor$City, " (", round(cor.hf183.cor$rho, digits = 2), ")")
# Plot
ggplot(cor.hf183.cor, aes(x=Flow_avg_day, y=HF183_avg_day, colour=WWTPs)) +
geom_point() +
labs(x = "Average daily flow - log10(MDG)",
y = "HF183 - log10(cp/L)") +
theme_bw()
## Warning: Removed 122 rows containing missing values (geom_point).
# Select data
cor.hf183.rsd<-data.per.day[,c("City", "HF183_avg_day", "Flow_avg_day")]
cor.hf183.rsd<-subset(cor.hf183.rsd, HF183_avg_day!="NA" & Flow_avg_day!="NA")
cor.hf183.rsd[,2]<-log10(as.numeric(as.character(cor.hf183.rsd[,2])))
cor.hf183.rsd[,3]<-as.numeric(as.character(cor.hf183.rsd[,3]))
#Compute means per plant
cor.hf183.rsd.results<-cor.hf183.rsd %>%
group_by(City) %>%
summarize(HF183.rsd.mean = (sd(HF183_avg_day, na.rm=TRUE)/mean(HF183_avg_day, na.rm=TRUE))*100,
Flow.mean = mean(Flow_avg_day, na.rm=TRUE),
n = sum(!is.na(City)),
.groups = 'drop')
# Correlation
cor.test(cor.hf183.rsd.results$HF183.rsd.mean, cor.hf183.rsd.results$Flow.mean, method = "spearman", na.action=na.omit)
##
## Spearman's rank correlation rho
##
## data: cor.hf183.rsd.results$HF183.rsd.mean and cor.hf183.rsd.results$Flow.mean
## S = 438, p-value = 0.0793
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.5314685
# Prepare data for plot
cor.hf183.rsd.results$WWTPs<-paste0(cor.hf183.rsd.results$City, " (", round(cor.hf183.rsd.results$n, digits = 2), ")")
#plot
ggplot(cor.hf183.rsd.results, aes(x=Flow.mean, y=HF183.rsd.mean, colour=WWTPs)) +
geom_point() +
labs(x = "Averaged daily flow per WWTP - (MDG)",
y = "Averaged HF183 RSD per WWTP (%)") +
theme_bw()
between dupicate filter, difference between controls and N1
data.per.day.lq<-subset(data.per.day, N1_LowestLevelDetection_day=="Quantifiable")
data.per.day.lq %>%
summarize(rho_BCoV=cor(N1_rsd_day, BCoV_rsd_day, method = "spearman", use="complete.obs"),
n_BCoV = sum(!is.na(N1_rsd_day) & !is.na(BCoV_rsd_day)),
rho_PMMoV=cor(N1_rsd_day, PMMoV_rsd_day, method = "spearman", use="complete.obs"),
n_PMMoV = sum(!is.na(N1_rsd_day) & !is.na(PMMoV_rsd_day)))
## rho_BCoV n_BCoV rho_PMMoV n_PMMoV
## 1 0.1131425 62 0.1919954 54
data.per.day %>%
summarize(rho_PMMoV_BCoV=cor(BCoV_rsd_day, PMMoV_rsd_day, method = "spearman", use="complete.obs"),
n_PMMoV_BCoV = sum(!is.na(PMMoV_rsd_day) & !is.na(BCoV_rsd_day)))
## rho_PMMoV_BCoV n_PMMoV_BCoV
## 1 0.3690999 84